#########
# This script creates a correspondence table between the person identifier (persnr) and the person's spell identifier (stes_id)
# Author: Jeremias Kl�ui
########

# Generate one line per persnr + month with the according stes_id
# If a job seeker has two spells in one month, take the old spell if it ends after the 15th or otherwise the new one


library("data.table")
library("stringr")
library("lubridate")

rm(list = ls())

setwd(dirname(rstudioapi::getSourceEditorContext()$path))
getwd()

stes<-fread("data_raw/20220329_SECO2KOF_Tab2_STES.csv", encoding = "UTF-8")

stes<-stes[,.(persnr, stes_id, dat_anmeld, dat_abmeld)]

stes[, dat_abmeld:=as.Date(dat_abmeld)]

stes[, month_start:=floor_date(dat_anmeld, "month")]
stes[, month_end:=floor_date(dat_abmeld, "month")]

# NA month end = not ended yet, set it to latest date in sample
stes[is.na(month_end), month_end:=max(month_start)]

# If we have more than one entry per stes and persnr, we take the earliest month_start and the latest month_end
stes[, month_start:=min(month_start), by=.(persnr, stes_id)]
stes[, month_end:=max(month_end), by=.(persnr, stes_id)]
stes<-unique(stes, by=c("persnr", "stes_id"))

# expand dataset to have one row per person * month for all months (not just end and start month)
persnr_to_stes_id<-stes[, .(month=seq(month_start, month_end, by="month"), last_date=dat_abmeld), by=.(persnr,stes_id)]

# conflict when more than one per month (some spells overlap for one day)
persnr_to_stes_id[, overlap:=.N>1, by=.(persnr, month)]

# In those situations, see if the old spell (the one which ends in the same month as the overlapping month)
# ends before or after the 15th, if after take the old one, if before the new one
persnr_to_stes_id<-persnr_to_stes_id[!(overlap==T & floor_date(last_date, "month")==month & day(last_date)<=15)]
setorder(persnr_to_stes_id, persnr, -last_date)
persnr_to_stes_id<-unique(persnr_to_stes_id, by=c("persnr", "month"))
persnr_to_stes_id<-persnr_to_stes_id[,.(persnr, month, stes_id)]

# save
fwrite(persnr_to_stes_id, "data_processed/persnr_to_stes_id.csv")

